This study investigates native Chinese speakers' vowel distribution and non-native Chinese speakers' vowel distribution
vocal tract
Google @datalorax
## # A tibble: 6 x 14 ## X__1 gender height age Mean yi yu wu ye wo en e ## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 NS1 F 163 29 F1 270 326 320 583 553 730 634 ## 2 NS1 F 163 29 F2 3041 2678 927 2548 872 1803 1078 ## 3 NS2 F 164 30 F1 321 307 341 537 563 710 482 ## 4 NS2 F 164 30 F2 2909 2383 836 2347 958 1863 1201 ## 5 NS3 F 165 27 F1 300 316 321 547 472 740 553 ## 6 NS3 F 165 27 F2 2899 2481 776 2694 821 1812 1282 ## # ... with 2 more variables: ai <dbl>, ao <dbl>
dplyr::spread and dplyr::gathernames(df)
## [1] "X__1" "gender" "height" "age" "Mean" "yi" "yu" ## [8] "wu" "ye" "wo" "en" "e" "ai" "ao"
janitor::clean_namesprint(df$X__1)
## [1] "NS1" "NS1" "NS2" "NS2" "NS3" "NS3" "NS4" "NS4" "NS5" "NS5" ## [11] "NS6" "NS6" "NNS1" "NNS1" "NNS2" "NNS2" "NNS3" "NNS3" "NNS4" "NNS4" ## [21] "NNS5" "NNS5" "NNS6" "NNS6"
dplyr::separate
dplyr::case_when ?tidy_df1 <- df %>%
clean_names(case = "snake") %>%
separate(x_1, into = c("group", "id"),
sep = "(?<=[A-Za-z])(?=[0-9])") %>% #sep = -1 also works
mutate(id = case_when(
group == "NNS" & id == '1' ~ '7',
group == "NNS" & id == '2' ~ '8',
group == "NNS" & id == '3' ~ '9',
group == "NNS" & id == '4' ~ '10',
group == "NNS" & id == '5' ~ '11',
group == "NNS" & id == '6' ~ '12',
TRUE ~ id
)) %>%
mutate(group = as.factor(group),
id = as.numeric(id))
tidy_df2 <- tidy_df1 %>% gather(vowel, value, -1:-6) %>% mutate(vowel = as.factor(vowel)) head(tidy_df2)
## # A tibble: 6 x 8 ## group id gender height age mean vowel value ## <fct> <dbl> <chr> <dbl> <dbl> <chr> <fct> <dbl> ## 1 NS 1 F 163 29 F1 yi 270 ## 2 NS 1 F 163 29 F2 yi 3041 ## 3 NS 2 F 164 30 F1 yi 321 ## 4 NS 2 F 164 30 F2 yi 2909 ## 5 NS 3 F 165 27 F1 yi 300 ## 6 NS 3 F 165 27 F2 yi 2899
tidy_df <- tidy_df2 %>% spread(mean, value) %>% #reorder variables & discard gender select(id, group, age, height, vowel, F1, F2) %>% arrange(id) head(tidy_df)
## # A tibble: 6 x 7 ## id group age height vowel F1 F2 ## <dbl> <fct> <dbl> <dbl> <fct> <dbl> <dbl> ## 1 1 NS 29 163 ai 808 1802 ## 2 1 NS 29 163 ao 832 1531 ## 3 1 NS 29 163 e 634 1078 ## 4 1 NS 29 163 en 730 1803 ## 5 1 NS 29 163 wo 553 872 ## 6 1 NS 29 163 wu 320 927
vocal tract
figure1_pre <- tidy_df %>% ggplot(aes(x = F2, y = F1, color = vowel)) + geom_point(size = 3) + facet_wrap(~ group) + theme_classic()
figure1_post <- tidy_df %>%
ggplot(aes(x = F2, y = F1, color = vowel)) +
geom_point(size = 3) +
facet_wrap(~ group) +
theme_classic() +
#reverse x and y to meet the perception of sounds
scale_x_reverse() +
scale_y_reverse() +
#reorder vowel based on IPA order
scale_color_discrete(breaks = c("yi", "yu",
"wu", "ye",
"wo", "en",
"e", "ai",
"ao")) +
labs(title = "Vowel Distribution among All Non-native and Native Speakers")
smry_df <- tidy_df %>%
group_by(group, vowel) %>%
summarize_at(vars(F1, F2), funs(mean, sd))
figure2 <- smry_df %>%
ggplot(aes(x = F2_mean, y = F1_mean)) +
# adds text directly to the plot
geom_label(aes(label = vowel, color = group)) +
scale_x_reverse() +
scale_y_reverse() +
theme_classic() +
labs(title = "Mean Vowel Distribution between Non-native and Native Speakers",
x = "F2",
y = "F1")
figure3 <-
ggplot(data = tidy_df, aes(x = F2, y = F1 )) +
geom_label(data = smry_df, aes(x = F2_mean, y = F1_mean,
label = vowel, fill = group),
alpha = 0.2) +
geom_point(aes(color = vowel, shape = group),
size = 3, alpha = 0.4) +
stat_ellipse(aes(color = vowel), level = 0.67) +
scale_x_reverse() +
scale_y_reverse() +
theme_classic() +
labs(title = "Individual and Group Vowel Distribution",
subtitle = "The size of output ellipses in standard deviations.") +
guides(color = FALSE)
smry_df %>% kable(format = "html",digits = 2,caption = "Table1",booktabs = T)
| group | vowel | F1_mean | F2_mean | F1_sd | F2_sd |
|---|---|---|---|---|---|
| NNS | ai | 913.33 | 1513.83 | 47.14 | 161.28 |
| NNS | ao | 901.50 | 1377.00 | 63.86 | 107.69 |
| NNS | e | 640.33 | 1702.33 | 70.43 | 238.62 |
| NNS | en | 651.33 | 1980.00 | 88.59 | 166.70 |
| NNS | wo | 551.50 | 1043.67 | 49.79 | 61.90 |
| NNS | wu | 416.00 | 1122.83 | 69.62 | 196.96 |
| NNS | ye | 520.67 | 2320.00 | 65.50 | 95.07 |
| NNS | yi | 335.67 | 2646.50 | 46.03 | 100.61 |
| NNS | yu | 321.50 | 1806.83 | 31.25 | 186.00 |
| NS | ai | 910.33 | 1655.50 | 115.38 | 142.20 |
| NS | ao | 848.00 | 1305.17 | 59.50 | 147.96 |
| NS | e | 596.83 | 1289.67 | 105.92 | 152.62 |
| NS | en | 717.67 | 1850.50 | 40.10 | 101.96 |
| NS | wo | 554.50 | 908.67 | 48.53 | 76.63 |
| NS | wu | 335.00 | 840.00 | 15.63 | 56.02 |
| NS | ye | 556.83 | 2486.50 | 38.02 | 128.39 |
| NS | yi | 308.33 | 2916.17 | 30.23 | 73.78 |
| NS | yu | 309.17 | 2420.83 | 15.17 | 237.25 |
kable(smry_df)
| group | vowel | F1_mean | F2_mean | F1_sd | F2_sd |
|---|---|---|---|---|---|
| NNS | ai | 913.3333 | 1513.8333 | 47.13668 | 161.27544 |
| NNS | ao | 901.5000 | 1377.0000 | 63.86157 | 107.68658 |
| NNS | e | 640.3333 | 1702.3333 | 70.43484 | 238.62411 |
| NNS | en | 651.3333 | 1980.0000 | 88.59044 | 166.69973 |
| NNS | wo | 551.5000 | 1043.6667 | 49.78654 | 61.89884 |
| NNS | wu | 416.0000 | 1122.8333 | 69.61609 | 196.95930 |
| NNS | ye | 520.6667 | 2320.0000 | 65.49707 | 95.07471 |
| NNS | yi | 335.6667 | 2646.5000 | 46.03332 | 100.60765 |
| NNS | yu | 321.5000 | 1806.8333 | 31.24580 | 186.00367 |
| NS | ai | 910.3333 | 1655.5000 | 115.37533 | 142.20373 |
| NS | ao | 848.0000 | 1305.1667 | 59.50462 | 147.96272 |
| NS | e | 596.8333 | 1289.6667 | 105.91962 | 152.62066 |
| NS | en | 717.6667 | 1850.5000 | 40.10320 | 101.96225 |
| NS | wo | 554.5000 | 908.6667 | 48.52937 | 76.62550 |
| NS | wu | 335.0000 | 840.0000 | 15.63330 | 56.02142 |
| NS | ye | 556.8333 | 2486.5000 | 38.01798 | 128.39120 |
| NS | yi | 308.3333 | 2916.1667 | 30.23023 | 73.77917 |
| NS | yu | 309.1667 | 2420.8333 | 15.17124 | 237.25212 |
table_f1mean <- smry_df %>% select(1:3) %>% spread(vowel,F1_mean) table_f1mean
## # A tibble: 2 x 10 ## # Groups: group [2] ## group ai ao e en wo wu ye yi yu ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 NNS 913. 902. 640. 651. 552. 416 521. 336. 322. ## 2 NS 910. 848 597. 718. 554. 335 557. 308. 309.
table_f2mean <- smry_df %>% select(1:2,4) %>% spread(vowel,F2_mean) table_f2mean
## # A tibble: 2 x 10 ## # Groups: group [2] ## group ai ao e en wo wu ye yi yu ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 NNS 1514. 1377 1702. 1980 1044. 1123. 2320 2646. 1807. ## 2 NS 1656. 1305. 1290. 1850. 909. 840 2486. 2916. 2421.
bind_rows(table_f1mean,table_f2mean) -> bindtables bindtables
## # A tibble: 4 x 10 ## # Groups: group [2] ## group ai ao e en wo wu ye yi yu ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 NNS 913. 902. 640. 651. 552. 416 521. 336. 322. ## 2 NS 910. 848 597. 718. 554. 335 557. 308. 309. ## 3 NNS 1514. 1377 1702. 1980 1044. 1123. 2320 2646. 1807. ## 4 NS 1656. 1305. 1290. 1850. 909. 840 2486. 2916. 2421.
bindtables["Mean"] <- c("F1","","F2","")
bindtables
## # A tibble: 4 x 11 ## # Groups: group [2] ## group ai ao e en wo wu ye yi yu Mean ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> ## 1 NNS 913. 902. 640. 651. 552. 416 521. 336. 322. F1 ## 2 NS 910. 848 597. 718. 554. 335 557. 308. 309. "" ## 3 NNS 1514. 1377 1702. 1980 1044. 1123. 2320 2646. 1807. F2 ## 4 NS 1656. 1305. 1290. 1850. 909. 840 2486. 2916. 2421. ""
bindtables_new <- bindtables[colnames(bindtables)[c(11,1:10)]] bindtables_new
## # A tibble: 4 x 11 ## # Groups: group [2] ## Mean group ai ao e en wo wu ye yi yu ## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 F1 NNS 913. 902. 640. 651. 552. 416 521. 336. 322. ## 2 "" NS 910. 848 597. 718. 554. 335 557. 308. 309. ## 3 F2 NNS 1514. 1377 1702. 1980 1044. 1123. 2320 2646. 1807. ## 4 "" NS 1656. 1305. 1290. 1850. 909. 840 2486. 2916. 2421.
bindtables_new %>% kable(format = "html",digits = 2,caption = "Table 1", booktabs = T)
| Mean | group | ai | ao | e | en | wo | wu | ye | yi | yu |
|---|---|---|---|---|---|---|---|---|---|---|
| F1 | NNS | 913.33 | 901.50 | 640.33 | 651.33 | 551.50 | 416.00 | 520.67 | 335.67 | 321.50 |
| NS | 910.33 | 848.00 | 596.83 | 717.67 | 554.50 | 335.00 | 556.83 | 308.33 | 309.17 | |
| F2 | NNS | 1513.83 | 1377.00 | 1702.33 | 1980.00 | 1043.67 | 1122.83 | 2320.00 | 2646.50 | 1806.83 |
| NS | 1655.50 | 1305.17 | 1289.67 | 1850.50 | 908.67 | 840.00 | 2486.50 | 2916.17 | 2420.83 |

An ideal table
install.packages("linguisticsdown")
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
devtools::install_github("liao961120/linguisticsdown")

Write IPA symbols with Phonetic Features
\newfontfamily\ipa{Doulos SIL}